\[\\[.4in]\]

Retrieving Forecast Data

Every week, forecasters submit their hospitalization predictions to COVID-19 ForecastHub. In this report, we rely on an AWS bucket that contains the estimates of a handful of signals (e.g., COVID death, cases, hospitalization, etc). Furthermore, the AWS server stores an array of evaluation metrics of these forecasts (e.g., Absolute Error, Weighted Interval Score, and 80% Coverage). Alternatively, the data can be retrieved from the publicly accessible covidcast and covideval APIs.

s3a <- get_bucket("forecast-eval", region = "us-east-2")
s3b <- get_bucket("forecasting-team-data")
scores1 <- s3readRDS("score_cards_state_hospitalizations.rds", s3a, 
                     region = "us-east-2")
scores1 <- subset(
  scores1, 
  forecaster %in% union(params$forecasters, "COVIDhub-baseline"))

# The Hub always makes forecasts on Monday (dofw 2)
# We move all forecast_dates to the following Monday and shorten the ahead
wday_shift <- function(x, base_dofw = 2) (base_dofw - wday(x)) %% 7
scores1 <- scores1 %>%
  mutate(ahead = ahead - wday_shift(forecast_date),
         forecast_date = forecast_date + wday_shift(forecast_date))

# need to pass in the right forecaster here
scores <- s3readRDS(params$aws_scores, s3b) %>%
  select(ahead, geo_value, forecaster, forecast_date, target_end_date,
         actual, wis, ae, cov_80, value_50, data_source, signal, 
         incidence_period) %>%
  # Logan's names are brutally long...
  mutate(forecaster = str_replace(forecaster, "predictors ", "predictors\n"))
our_pred_dates <- unique(scores$forecast_date)
forecast_dates <- our_pred_dates
aheads <- unique(scores$ahead)
geo_values <- unique(scores$geo_value)
veggies <- c("asparagus", "broccoli", "chard", "daikon", 
             "escarole", "fennel", "garlic", "horseradish",
             "jicama", "kohlrabi", "lettuce", "mushroom")
names_table <- tibble(forecaster = unique(scores$forecaster), veggies = veggies)
scores <- left_join(scores, names_table) %>% 
  select(-forecaster) %>%
  rename(forecaster = veggies)

results <- scores1 %>% 
  select(ahead, geo_value, forecaster, forecast_date, target_end_date,
         actual, wis, ae, cov_80, value_50, data_source, signal, 
         incidence_period) %>%
  bind_rows(scores) %>%
  filter(forecast_date %in% forecast_dates,
         ahead %in% aheads,
         geo_value %in% geo_values) 

The target forecast dates are:
2020-11-16, 2020-11-23, 2020-11-30, 2020-12-07, 2020-12-14, 2020-12-21, 2020-12-28, 2021-01-04, 2021-01-11, 2021-01-18, 2021-01-25, 2021-02-01, 2021-02-15, 2021-02-22, 2021-03-01, 2021-03-08, 2021-03-15, 2021-03-22, 2021-03-29, 2021-04-05, 2021-04-12, 2021-04-19, 2021-05-03, 2021-05-10, 2021-05-17, 2021-05-24, 2021-05-31, 2021-06-07, 2021-06-14, 2021-06-21, 2021-06-28, 2021-07-05, 2021-07-12, 2021-07-19, 2021-07-26, 2021-08-02, 2021-08-09, 2021-08-16, 2021-08-23, 2021-08-30, 2021-09-06, 2021-09-13, 2021-09-20, 2021-09-27

The template will compile data of the following forecasters:
COVIDhub-ensemble, USC-SI_kJalpha, MOBS-GLEAM_COVID, Karlen-pypm, JHUAPL-SLPHospEns.

For this analysis, all of Logan’s forecasters have been renamed:

kableExtra::kbl(names_table) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))
forecaster veggies
cross-nonneg latest—-1d—count-predictors misaligned-1d————–count-targets train-on-28-days–all-wdays asparagus
cross-nonneg latest—-1d—count-predictors misaligned-1d————–count-targets train-on-all-time-all-wdays broccoli
cross-nonneg latest—-7dav-count-predictors misaligned-1d————–count-targets train-on-28-days–all-wdays chard
cross-nonneg latest—-7dav-count-predictors misaligned-1d————–count-targets train-on-all-time-all-wdays daikon
evalcast baseline escarole
noncrosstrain fixed-lag-1d—count-predictors aligned—-1d————–count-targets train-on-all-time-all-wdays fennel
noncrosstrain fixed-lag-1d—count-predictors aligned—-1d————–count-targets train-on-all-time-same-wday garlic
noncrosstrain latest—-1d—count-predictors aligned—-1d————–count-targets train-on-all-time-all-wdays horseradish
noncrosstrain latest—-1d—count-predictors aligned—-1d————–count-targets train-on-all-time-same-wday jicama
noncrosstrain latest—-1d—rate–predictors aligned—-1d————–rate–targets train-on-all-time-all-wdays kohlrabi
noncrosstrain latest—-7dav-rate–predictors aligned—-1d————–rate–targets train-on-all-time-all-wdays lettuce
noncrosstrain latest—-7dav-rate–predictors aligned—-7dav-with-fixup-rate–targets train-on-all-time-all-wdays mushroom

\[\\[.07in]\]

Weighted Interval Score (relative to baseline) and 80% Coverage

WIS by Forecast Date (GeoMean)

Mean <- function(x) mean(x, na.rm = TRUE)
GeoMean <- function(x, offset = 0) exp(Mean(log(x + offset)))

facets.label = str_glue("{aheads} days ahead")
names(facets.label) = aheads

subtitle = sprintf("Forecasts made over %s to %s",
                   format(min(forecast_dates), "%B %d, %Y"),
                   format(max(forecast_dates), "%B %d, %Y"))

plot_wis <-
  plot_canonical(results, 
                 x = "forecast_date", 
                 y = "wis", 
                 aggr = GeoMean,
                 grp_vars = c("forecaster","ahead"), 
                 facet_rows = "ahead", dots = FALSE,
                 base_forecaster = "COVIDhub-baseline") + 
  labs(title = subtitle, 
       x = "Forecast Dates", 
       y = "Geometric Mean WIS") +
  #geom_point(aes(forecast_date, round(wis, digits = 2), color)), alpha = 0.05) +
  facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead=facets.label)) +
  theme_bw() +
  theme(plot.title = element_text(hjust = "center"),
        legend.position = "bottom",
        legend.title = element_blank()) + 
  scale_y_log10() +
  geom_hline(yintercept = 1, size = 1.5) +
  scale_color_viridis_d() +
  guides(color = guide_legend(ncol = 2))


ggplotly(plot_wis, tooltip="text", height=800, width= 1000) %>% 
  layout(hoverlabel = list(bgcolor = "white"))

WIS by Forecast Date (Mean)

plot_wis <-
  plot_canonical(results, 
                 x = "forecast_date", 
                 y = "wis", 
                 aggr = Mean,
                 grp_vars = c("forecaster","ahead"), 
                 facet_rows = "ahead", dots = FALSE,
                 base_forecaster = "COVIDhub-baseline") + 
  labs(title = subtitle, 
       x = "Forecast Dates", 
       y = "Geometric Mean WIS") +
  #geom_point(aes(forecast_date, round(wis, digits = 2), color)), alpha = 0.05) +
  facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead=facets.label)) +
  theme_bw() +
  theme(plot.title = element_text(hjust = "center"),
        legend.position = "bottom",
        legend.title = element_blank()) + 
  scale_y_log10() +
  geom_hline(yintercept = 1, size = 1.5) +
  scale_color_viridis_d() +
  guides(color = guide_legend(ncol = 2))


ggplotly(plot_wis, tooltip="text", height=800, width= 1000) %>% 
  layout(hoverlabel = list(bgcolor = "white"))

WIS by Ahead (GeoMean)

plot_wis_a <-
  plot_canonical(results, 
                 x = "ahead", 
                 y = "wis", 
                 aggr = GeoMean,
                 grp_vars = c("forecaster"), 
                 dots = TRUE,
                 base_forecaster = "COVIDhub-baseline") + 
  labs(title = subtitle, 
       x = "Days ahead", 
       y = "Geometric Mean WIS") +
  theme_bw() +
  theme(plot.title = element_text(hjust = "center"),
        legend.position = "bottom",
        legend.title = element_blank()) + 
  geom_hline(yintercept = 1, size = 1.5) +
  scale_y_log10() +
  scale_color_viridis_d() +
  guides(color = guide_legend(ncol = 2))


ggplotly(plot_wis_a, tooltip="text", height=800, width= 1000) %>% 
  layout(hoverlabel = list(bgcolor = "white"))

WIS by Ahead (Mean)

plot_wis_a <-
  plot_canonical(results, 
                 x = "ahead", 
                 y = "wis", 
                 aggr = GeoMean,
                 grp_vars = c("forecaster"), 
                 dots = TRUE,
                 base_forecaster = "COVIDhub-baseline") + 
  labs(title = subtitle, 
       x = "Days ahead", 
       y = "Geometric Mean WIS") +
  theme_bw() +
  theme(plot.title = element_text(hjust = "center"),
        legend.position = "bottom",
        legend.title = element_blank()) + 
  geom_hline(yintercept = 1, size = 1.5) +
  scale_y_log10() +
  scale_color_viridis_d() +
  guides(color = guide_legend(ncol = 2))


ggplotly(plot_wis_a, tooltip="text", height=800, width= 1000) %>% 
  layout(hoverlabel = list(bgcolor = "white"))

% Coverage by Forecast Date

plot_cov80 <-
  plot_canonical(results, 
                 x = "forecast_date", 
                 y = "cov_80", 
                 aggr = mean,
                 grp_vars = c("forecaster","ahead"), 
                 facet_rows = "ahead",
                 dots = FALSE) +
  labs(title = subtitle, x= "Forecast date", y = "Mean Coverage 80") +
  facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead = facets.label)) +
  theme_bw() +
  theme(plot.title = element_text(hjust = "center"),
        legend.position = "bottom",
        legend.title = element_blank()) + 
  scale_color_viridis_d() +
  geom_hline(yintercept = 0.8, size = 1.5) +
  guides(color = guide_legend(ncol = 2))

ggplotly(plot_cov80, tooltip="text", height=800, width=1000) 

% Coverage by Ahead

plot_cov80_a <-
  plot_canonical(results, 
                 x = "ahead", 
                 y = "cov_80", 
                 aggr = mean,
                 grp_vars = "forecaster", 
                 dots = TRUE) +
  labs(title = subtitle, x= "Days ahead", y = "Mean Coverage 80") +
  theme_bw() +
  theme(plot.title = element_text(hjust = "center"),
        legend.position = "bottom",
        legend.title = element_blank()) + 
  scale_color_viridis_d() +
  geom_hline(yintercept = 0.8, size = 1.5) +
  guides(color = guide_legend(ncol = 2))

ggplotly(plot_cov80_a, tooltip="text", height=800, width=1000) 

Maps

library(sf)

results_intersect <- intersect_averagers(
  scores, c("forecaster"), c("forecast_date", "geo_value")) %>%
  select(c("ahead", "geo_value", "forecaster","forecast_date", "data_source", "signal","target_end_date","incidence_period","actual","wis","ae","cov_80"))

maps <- results_intersect %>%
  group_by(geo_value, forecaster) %>%
  summarise(wis = Mean(wis),
            cov_80 = Mean(cov_80)) %>%
  left_join(animalia::state_population, by = "geo_value") %>%
  mutate(wis =  wis / population * 1e5) %>%
  pivot_longer(c("wis", "cov_80"), names_to = "score") %>%
  group_by(score) %>%
  mutate(time_value = Sys.Date(),
         max = max(value), min = min(value)) %>%
  group_by(forecaster, .add = TRUE) 
keys <- maps %>% group_keys()
maps <- maps %>% group_split()

levs <- levels(maps[[1]]$score)

# for county prediction, set geo_type = "county"
maps <- purrr::map(maps, 
                   ~as.covidcast_signal(
                     .x, signal = .x$score[1], 
                     data_source = .x$forecaster[1], 
                     geo_type = "state"))

maps <- purrr::map2(
  maps, keys$score,
  ~plot(.x, 
        choro_col = scales::viridis_pal()(3),
        range = if (.y == "wis") c(.x$min[1], .x$max[1]) else c(0,1)))

Mean Weighted Interval Score

cowplot::plot_grid(plotlist = maps[keys$score == "wis"], ncol = 3)

Coverage 80

cowplot::plot_grid(plotlist = maps[keys$score == "cov_80"], ncol = 3)